Load needed packages to use haploreconstruct & haplovalidate

##HAPLORECONSTRUCT

#load functions (+ required packages) ###############################

#setwd("/Users/dagny/Dropbox (PopGen)/haplotypes/")
source("functions/EEC.freq.traj.R")
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 4.1.2
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:ggpubr':
## 
##     mutate
source("functions/EEC.manhattan.plot.R")
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.7             ggpubr_0.4.0           ggplot2_3.3.6         
##  [4] randomcoloR_1.1.0.1    haploReconstruct_0.1.3 haplovalidate_0.1.5   
##  [7] RColorBrewer_1.1-3     stringr_1.4.0          psych_2.1.9           
## [10] data.table_1.14.2     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.9         lattice_0.20-45    tidyr_1.1.4        zoo_1.8-9         
##  [5] gtools_3.9.2       digest_0.6.29      foreach_1.5.1      utf8_1.2.2        
##  [9] V8_4.2.2           R6_2.5.1           backports_1.4.1    evaluate_0.14     
## [13] pillar_1.8.0       gplots_3.1.1       rlang_1.0.4        curl_4.3.2        
## [17] rstudioapi_0.13    car_3.0-12         jquerylib_0.1.4    rmarkdown_2.11    
## [21] Rtsne_0.16         igraph_1.2.11      munsell_0.5.0      broom_0.7.10      
## [25] compiler_4.1.0     xfun_0.27          pkgconfig_2.0.3    mnormt_2.0.2      
## [29] tmvnsim_1.0-2      htmltools_0.5.2    tidyselect_1.1.2   tibble_3.1.8      
## [33] codetools_0.2-18   matrixStats_0.61.0 fansi_1.0.3        dplyr_1.0.9       
## [37] withr_2.5.0        bitops_1.0-7       grid_4.1.0         nlme_3.1-153      
## [41] jsonlite_1.7.2     gtable_0.3.0       lifecycle_1.0.1    magrittr_2.0.3    
## [45] scales_1.2.0       KernSmooth_2.23-20 carData_3.0-4      cli_3.3.0         
## [49] stringi_1.7.5      cachem_1.0.6       ggsignif_0.6.3     dbscan_1.1-10     
## [53] bslib_0.4.0        vctrs_0.4.1        generics_0.1.3     iterators_1.0.13  
## [57] tools_4.1.0        glue_1.6.2         purrr_0.3.4        abind_1.4-5       
## [61] parallel_4.1.0     fastmap_1.1.0      yaml_2.2.1         colorspace_2.0-3  
## [65] cluster_2.1.2      caTools_1.18.2     rstatix_0.7.0      knitr_1.36        
## [69] sass_0.4.2

#function for clustering ###############################

perform_clustering<-function(l.freqs, l.min.cl.cor=min.cl.cor, l.min.minor.freq=min.minor.freq, l.max.minor.freq=max.minor.freq,
                             l.min.freq.change=min.freq.change,l.min.repl=min.repl,l.min.cl.size=min.cl.size,
                             l.win.size=win.size,l.ngen=ngen,l.run.id="01",l.chr=c("2","3")){
  print("Initialize Time Series ...")
  l.markers<-data.frame("chr"=character(),"pos"=numeric(),"cluster"=numeric())
  #perform haplotype reconstruct for the chromosomes present in your data:
  for(l.chr.iter in l.chr){
  #format time series data 
  temp.freqs<-subset(l.freqs,chr==l.chr.iter)
  timeSeries<-initialize_SNP_time_series(chr=temp.freqs$chr,pos=temp.freqs$pos,base.freq=temp.freqs$basePops,
                                         lib.freqs=temp.freqs[,grep("L",colnames(temp.freqs)),with=F],pop.ident=rep(c(1:10),l.ngen),
                                         pop.generation = rep(seq(0,l.ngen*10-10,10),each=10),
                                         use.libs = rep(T,10*l.ngen), min.minor.freq=l.min.minor.freq,
                                         max.minor.freq = l.max.minor.freq, winsize = l.win.size, min.lib.frac = 0.75,
                                         minfreqchange = l.min.freq.change, win.scale = "bp", minrepl = l.min.repl)
 
  print(paste("Clustering on chromosome",l.chr.iter,"is running. Please be patient ...",sep=" "))
  hbs<-reconstruct_hb(timeSeries,chrom=l.chr.iter, min.cl.size=l.min.cl.size, min.cl.cor=l.min.cl.cor,min.inter=4,single.win=T)
  
  if (number_hbr(hbs)!=0){
      n.clusters<-number_hbr(hbs)
      for (j in c(1:n.clusters)){
        temp.markers<-data.frame("chr"=rep(l.chr.iter,length(markers(hbs,j))),"pos"=markers(hbs,j),"cluster"=rep(j,length(markers(hbs,j))))
        l.markers<-rbind(l.markers,temp.markers)
      }
  }
  }
  #arrange & format result 
  l.markers<-arrange(l.markers,chr,pos)
  l.markers$id<-paste(l.markers$chr,l.markers$cluster,sep=".")
  #save clustering as .rds 
  toSave<-paste("HaploReconstruct-Min-Cl-Corr",l.min.cl.cor,"-Min-Minor-Freq",l.min.minor.freq,
                "-Max-Minor-Freq",l.max.minor.freq,"-Min-Freq-Change",l.min.freq.change,"-Min-Repl",l.min.repl,
                "-Min-Cl-Size",l.min.cl.size,"-Win-Size",l.win.size,"Run-ID",l.run.id,".rds",sep="")
  saveRDS(hbs,file = toSave)
  return(l.markers)
}

obtain_colors<-function(x,y){
  n <- length(unique(x$id))
  palette <- distinctColorPalette(n)
  x$id<-factor(x$id)
  x$color<-x$id
  levels(x$color)<-palette
  z<-merge(x,y,by=c("chr","pos"))
  return(z)
}

#data ###############################

#target sites:
targets<-readRDS("data/929_targets_all.rds")
#ACER CMH results:
cmh<-readRDS("data/sim929_acer_cmh.rds")
colnames(cmh)<-c("chr","pos","value")
#sync file:
candSNP<-readRDS("data/sim929_cands.rds")

Explore the data

cmh
##        chr      pos    value
##     1:   2   196428 2.024998
##     2:   2   356282 1.595361
##     3:   2   498592 1.984070
##     4:   2   501458 1.028718
##     5:   2   504144 1.116426
##    ---                      
## 16850:   3 48383675 1.141732
## 16851:   3 48543097 1.364474
## 16852:   3 48586008 2.848238
## 16853:   3 48635159 1.593615
## 16854:   3 48899371 1.105892
#View(cmh)
candSNP
##       chr      pos ref riseallele fallallele   basePops         L1         L2
##    1:   2   196428   G          G          T 0.85921325 0.84210526 0.73913043
##    2:   2   356282   A          G          A 0.04624277 0.04347826 0.01785714
##    3:   2   498592   C          G          C 0.10642570 0.14634146 0.12195122
##    4:   2   708369   A          A          G 0.57319588 0.67500000 0.54098361
##    5:   2   980126   C          A          C 0.27845528 0.29166667 0.26190476
##   ---                                                                        
## 7082:   3  9904945   G          A          G 0.01169591 0.03571429 0.02325581
## 7083:   3 11264019   G          T          G 0.03125000 0.02173913 0.00000000
## 7084:   3 29532659   T          T          C 0.75104603 0.73134328 0.79069767
## 7085:   3 13611701   A          G          A 0.37404580 0.30909091 0.26666667
## 7086:   3 10421244   T          C          T 0.09919028 0.09375000 0.07547170
##               L3         L4         L5         L6         L7         L8
##    1: 0.94117647 0.86666667 0.86666667 0.90566038 0.83636364 0.78260870
##    2: 0.01886792 0.07017544 0.06896552 0.04347826 0.07843137 0.01754386
##    3: 0.17307692 0.09090909 0.05000000 0.06250000 0.10416667 0.12962963
##    4: 0.45652174 0.61702128 0.61290323 0.54347826 0.62790698 0.52083333
##    5: 0.21153846 0.46666667 0.20000000 0.31147541 0.17777778 0.35555556
##   ---                                                                  
## 7082: 0.00000000 0.00000000 0.02222222 0.01612903 0.02380952 0.00000000
## 7083: 0.02325581 0.09090909 0.00000000 0.06122449 0.00000000 0.00000000
## 7084: 0.83720930 0.76595745 0.63265306 0.78260870 0.75000000 0.75000000
## 7085: 0.32758621 0.51785714 0.39285714 0.26530612 0.44186047 0.41071429
## 7086: 0.04878049 0.06666667 0.17241379 0.14285714 0.05882353 0.11320755
##               L9        L10        L11        L12        L13        L14
##    1: 0.88709677 0.90476190 0.92424242 0.97142857 0.91836735 0.97777778
##    2: 0.03278689 0.08823529 0.03921569 0.09615385 0.10204082 0.08333333
##    3: 0.10909091 0.09090909 0.08955224 0.12962963 0.10909091 0.18333333
##    4: 0.60000000 0.54761905 0.64912281 0.61403509 0.59523810 0.62962963
##    5: 0.21428571 0.23809524 0.25423729 0.33333333 0.37254902 0.34615385
##   ---                                                                  
## 7082: 0.00000000 0.00000000 0.05000000 0.05660377 0.07575758 0.02000000
## 7083: 0.06000000 0.03125000 0.03773585 0.00000000 0.13953488 0.09090909
## 7084: 0.73913043 0.75675676 0.78378378 0.80952381 0.86792453 0.87931034
## 7085: 0.32500000 0.43939394 0.31818182 0.45454545 0.43396226 0.38775510
## 7086: 0.11764706 0.07407407 0.08333333 0.13333333 0.12765957 0.07692308
##              L15        L16        L17        L18        L19        L20
##    1: 0.97727273 0.90322581 0.92452830 0.86538462 1.00000000 0.93103448
##    2: 0.12500000 0.07547170 0.00000000 0.09433962 0.15094340 0.06521739
##    3: 0.10869565 0.24444444 0.06818182 0.10000000 0.12765957 0.17647059
##    4: 0.69047619 0.72972973 0.60784314 0.70175439 0.70689655 0.65116279
##    5: 0.40909091 0.19148936 0.28888889 0.41304348 0.34545455 0.36170213
##   ---                                                                  
## 7082: 0.01818182 0.02380952 0.01785714 0.06000000 0.02083333 0.00000000
## 7083: 0.09259259 0.02500000 0.06521739 0.03636364 0.10416667 0.00000000
## 7084: 0.65789474 0.83076923 0.71052632 0.82608696 0.77500000 0.81818182
## 7085: 0.45652174 0.34615385 0.39534884 0.45098039 0.33333333 0.42857143
## 7086: 0.06250000 0.20512821 0.03333333 0.10344828 0.02500000 0.10204082
##              L21        L22        L23        L24        L25        L26
##    1: 0.92307692 0.88888889 0.88636364 0.96000000 0.91111111 0.90740741
##    2: 0.11475410 0.12820513 0.08510638 0.09523810 0.24590164 0.14545455
##    3: 0.07142857 0.14285714 0.16949153 0.10204082 0.17777778 0.16981132
##    4: 0.63793103 0.64062500 0.58000000 0.65714286 0.68292683 0.66666667
##    5: 0.23913043 0.31111111 0.37735849 0.30909091 0.34042553 0.30508475
##   ---                                                                  
## 7082: 0.01754386 0.02222222 0.07142857 0.06000000 0.19607843 0.01724138
## 7083: 0.00000000 0.04347826 0.08771930 0.04761905 0.08510638 0.07547170
## 7084: 0.82352941 0.79166667 0.81818182 0.88461538 0.82692308 0.72549020
## 7085: 0.30952381 0.43859649 0.44897959 0.45652174 0.56250000 0.52000000
## 7086: 0.15686275 0.13636364 0.20754717 0.07843137 0.18965517 0.09259259
##              L27        L28        L29        L30        L31        L32
##    1: 0.91176471 0.95833333 1.00000000 0.93333333 0.94444444 0.93617021
##    2: 0.04166667 0.06382979 0.07692308 0.04651163 0.17948718 0.04918033
##    3: 0.14285714 0.28070175 0.20370370 0.18181818 0.21739130 0.18604651
##    4: 0.58536585 0.59649123 0.61818182 0.52173913 0.81578947 0.82812500
##    5: 0.41818182 0.44000000 0.37500000 0.35294118 0.36956522 0.32258065
##   ---                                                                  
## 7082: 0.01818182 0.04444444 0.00000000 0.00000000 0.06122449 0.00000000
## 7083: 0.02325581 0.11475410 0.07692308 0.03508772 0.02500000 0.02272727
## 7084: 0.80769231 0.75409836 0.78688525 0.85714286 0.84905660 0.85245902
## 7085: 0.29166667 0.47916667 0.50000000 0.55555556 0.45283019 0.57777778
## 7086: 0.12765957 0.15000000 0.12068966 0.04347826 0.21153846 0.10000000
##             L33        L34       L35       L36        L37        L38        L39
##    1: 0.9772727 0.93617021 0.9230769 0.9565217 0.95555556 0.93023256 0.91666667
##    2: 0.1020408 0.04444444 0.1016949 0.1764706 0.07692308 0.11111111 0.12068966
##    3: 0.1190476 0.20338983 0.2000000 0.1000000 0.06818182 0.17307692 0.11764706
##    4: 0.7659574 0.59259259 0.6346154 0.6857143 0.65116279 0.67741935 0.71186441
##    5: 0.4000000 0.51162791 0.4042553 0.4655172 0.28260870 0.32142857 0.24444444
##   ---                                                                          
## 7082: 0.0000000 0.01818182 0.1590909 0.1111111 0.00000000 0.05357143 0.00000000
## 7083: 0.1836735 0.06976744 0.1250000 0.1489362 0.01587302 0.07692308 0.02272727
## 7084: 0.8703704 0.85365854 0.7884615 0.7636364 0.84905660 0.73076923 0.81818182
## 7085: 0.4303797 0.65909091 0.5555556 0.4615385 0.42553191 0.37500000 0.33928571
## 7086: 0.1923077 0.11111111 0.3396226 0.1269841 0.10869565 0.13636364 0.06521739
##              L40        L41        L42        L43        L44       L45
##    1: 1.00000000 0.93617021 0.91836735 0.95918367 0.98113208 0.9583333
##    2: 0.07692308 0.05357143 0.05454545 0.15873016 0.06122449 0.1363636
##    3: 0.06451613 0.21276596 0.14814815 0.15517241 0.19444444 0.3589744
##    4: 0.72000000 0.73770492 0.64583333 0.65384615 0.65000000 0.5961538
##    5: 0.35416667 0.34090909 0.28571429 0.39655172 0.38095238 0.3518519
##   ---                                                                 
## 7082: 0.02127660 0.00000000 0.02272727 0.08695652 0.04838710 0.2075472
## 7083: 0.09433962 0.01851852 0.00000000 0.18750000 0.17021277 0.2727273
## 7084: 0.90909091 0.88333333 0.90909091 0.83076923 0.88888889 0.8983051
## 7085: 0.54901961 0.40983607 0.60000000 0.53703704 0.55555556 0.7169811
## 7086: 0.12765957 0.05405405 0.07142857 0.10416667 0.13793103 0.2500000
##              L46        L47        L48        L49        L50        L51
##    1: 0.96296296 0.95000000 0.92307692 0.90566038 0.92857143 0.89130435
##    2: 0.06818182 0.12195122 0.15000000 0.06451613 0.05357143 0.13461538
##    3: 0.25000000 0.04878049 0.22222222 0.18518519 0.11864407 0.15000000
##    4: 0.79069767 0.71111111 0.64444444 0.66666667 0.73076923 0.78260870
##    5: 0.34615385 0.28947368 0.56521739 0.42307692 0.35897436 0.55102041
##   ---                                                                  
## 7082: 0.11764706 0.07500000 0.02173913 0.00000000 0.02040816 0.00000000
## 7083: 0.03389831 0.07272727 0.21666667 0.09090909 0.03508772 0.03703704
## 7084: 0.80952381 0.86792453 0.90384615 0.82926829 0.78688525 0.87500000
## 7085: 0.44155844 0.33928571 0.43636364 0.51851852 0.58333333 0.42857143
## 7086: 0.17647059 0.06976744 0.27083333 0.10869565 0.03636364 0.09677419
##              L52        L53        L54       L55        L56        L57
##    1: 0.83333333 1.00000000 0.98039216 0.9250000 0.95744681 0.91525424
##    2: 0.04651163 0.06896552 0.11320755 0.1914894 0.07843137 0.08510638
##    3: 0.16666667 0.06666667 0.25490196 0.1777778 0.24528302 0.07142857
##    4: 0.70175439 0.69090909 0.62222222 0.6666667 0.75000000 0.51562500
##    5: 0.26415094 0.39534884 0.34693878 0.3953488 0.34615385 0.43137255
##   ---                                                                 
## 7082: 0.00000000 0.16981132 0.18750000 0.3333333 0.11666667 0.07017544
## 7083: 0.08695652 0.26229508 0.14000000 0.6274510 0.18000000 0.06000000
## 7084: 0.95384615 0.91489362 0.91489362 0.8846154 0.79629630 0.81666667
## 7085: 0.54545455 0.50980392 0.61538462 0.7358491 0.39622642 0.28571429
## 7086: 0.13333333 0.24489796 0.07142857 0.2950820 0.11363636 0.31707317
##              L58        L59        L60        L61       L62       L63       L64
##    1: 0.92307692 0.90909091 0.95161290 0.98113208 0.9661017 1.0000000 0.9705882
##    2: 0.10000000 0.22222222 0.02173913 0.20338983 0.0000000 0.1951220 0.1296296
##    3: 0.32203390 0.08888889 0.17500000 0.33333333 0.1891892 0.1914894 0.2241379
##    4: 0.70909091 0.77777778 0.60000000 0.85416667 0.8750000 0.7924528 0.7580645
##    5: 0.32558140 0.36956522 0.64705882 0.31707317 0.2000000 0.3200000 0.5000000
##   ---                                                                          
## 7082: 0.06666667 0.00000000 0.01724138 0.00000000 0.0000000 0.1800000 0.2400000
## 7083: 0.15384615 0.09615385 0.00000000 0.00000000 0.1463415 0.2916667 0.3750000
## 7084: 0.78787879 0.81818182 0.77358491 0.94230769 0.9285714 0.8888889 0.9583333
## 7085: 0.51851852 0.45283019 0.64814815 0.51612903 0.6071429 0.4893617 0.6415094
## 7086: 0.12765957 0.08000000 0.09090909 0.09615385 0.0800000 0.3188406 0.3529412
##             L65        L66        L67        L68        L69        L70
##    1: 0.9791667 0.98387097 0.96825397 0.89090909 1.00000000 0.86792453
##    2: 0.3023256 0.15254237 0.05769231 0.09090909 0.15686275 0.04444444
##    3: 0.2407407 0.32432432 0.07547170 0.32692308 0.27906977 0.13333333
##    4: 0.7200000 0.78260870 0.65454545 0.68518519 0.64102564 0.63265306
##    5: 0.4814815 0.44186047 0.58620690 0.45833333 0.54901961 0.48979592
##   ---                                                                 
## 7082: 0.3400000 0.09090909 0.06122449 0.06382979 0.00000000 0.00000000
## 7083: 0.6315789 0.09090909 0.07017544 0.14814815 0.07142857 0.05769231
## 7084: 0.8214286 0.85106383 0.83333333 0.96000000 0.96078431 0.75409836
## 7085: 0.5849057 0.56521739 0.37037037 0.39655172 0.57500000 0.59090909
## 7086: 0.4230769 0.28571429 0.20000000 0.07407407 0.11904762 0.12500000
#View(cands)

cmh is storing a table with 1) chromosome 2) position 3)log10(pval), from CMH. This includes signficant and non-significant snps

candSNP includes 1) chromosome 2) position 3) ref allele 4) rising allele 5) falling allele 6) mean starting/base frequency 7-76) frequencies of each replicate across each time point (sorted by time points). To save some memory, cands is prefiltered, but usually this should have the same number of rows as cmh.

(If needed, how to create a cands file?)

parameters for clustering

min.minor.freq<-0
max.minor.freq<-1
min.freq.change<-0.15
min.repl<-2
min.cl.size<-20
win.size<-3e+06
ngen<-7
min.cl.cor<-0.6

#actual clustering

stringent.cluster<-perform_clustering(l.freqs = candSNP,l.run.id = "001",l.min.cl.cor = min.cl.cor)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
table(stringent.cluster$id)
## 
## 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.1 3.2 3.3 3.4 3.5 3.6 3.7 
## 138  69  20  45 136 111  20  54  38 878  60  31  23  25  52 134
relaxed.cluster<-perform_clustering(l.freqs = candSNP,l.run.id = "002",l.min.cl.cor = 0.2)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
table(relaxed.cluster$id)
## 
##  2.1  2.2  2.3  3.1  3.2  3.3  3.4 
## 1212 1032  591  101 2475 1195   26

#plotting:

stringent.cluster<-obtain_colors(stringent.cluster,cmh)
#png("res/demo-stringent.cluster.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering",highlight = stringent.cluster,highlight.col = stringent.cluster$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

#allele frequency trajectories:
#png("res/demo-stringent.cluster.af.png",width=750,height=500)
print(get.af.traj(l.sync=candSNP,l.cluster=stringent.cluster,l.cluster.id = "2.1"))

#dev.off()

relaxed.cluster<-obtain_colors(relaxed.cluster,cmh)
#png("res/demo-relaxed.cluster.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering",highlight = relaxed.cluster,highlight.col = relaxed.cluster$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

#png("res/demo-relaxed.cluster.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = relaxed.cluster,l.cluster.id = "2.1"))

#dev.off()

HAPLORECONSTRUCT SOLUTIONS

Task 0

table(targets$chr)
## 
## 2 3 
## 7 8
#7 targets on chr 2, 8 on chr 3 
ggscatter(data=targets,x="freq",y="s")

#negative, non-linear relationship betweenn selection coefficient and starting allele frequency 

Task 1

#number of blocks: 
length(table(stringent.cluster$id))
## [1] 16
length(table(relaxed.cluster$id))
## [1] 7
obtain_length<-function(x){
  range_temp<-c()
  for(id.iter in unique(x$id)){
    temp<-subset(x,id==id.iter)
    min_temp<-min(temp$pos)
    max_temp<-max(temp$pos)
    range_temp<-c(range_temp,max_temp-min_temp)
  }
  return(range_temp)
}

#SNP number ~ min.cl.cor
marker_SNPs<-data.frame(n=c(table(stringent.cluster$id),table(relaxed.cluster$id)))
marker_SNPs$type<-c(rep("stringent",length(unique(stringent.cluster$id))),rep("relaxed",length(unique(relaxed.cluster$id))))
ggboxplot(data=marker_SNPs,x="type",y="n")

#block length ~ min.cl.cor
block_length<-data.frame(length=c(obtain_length(stringent.cluster),obtain_length(relaxed.cluster)))
block_length$type<-c(rep("stringent",length(unique(stringent.cluster$id))),rep("relaxed",length(unique(relaxed.cluster$id))))
ggboxplot(data=block_length,x="type",y="length")

Task 2

table(targets$pos%in%stringent.cluster$pos)
## 
## FALSE  TRUE 
##     9     6
table(targets$pos%in%relaxed.cluster$pos)
## 
## FALSE  TRUE 
##     3    12
id_stringent<-which(targets$pos%in%stringent.cluster$pos)
pos_stringent<-targets$pos[id_stringent]
table(stringent.cluster$id[stringent.cluster$pos%in%pos_stringent])
## 
## 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.1 3.2 3.3 3.4 3.5 3.6 3.7 
##   1   0   0   0   0   0   1   1   0   1   0   0   0   1   0   1
#stringent: blocks without targets 

id_relaxed<-which(targets$pos%in%relaxed.cluster$pos)
pos_relaxed<-targets$pos[id_relaxed]
table(relaxed.cluster$id[relaxed.cluster$pos%in%pos_relaxed])
## 
## 2.1 2.2 2.3 3.1 3.2 3.3 3.4 
##   3   2   2   0   2   2   1
#relaxed: blocks with multiple targets

targets$stringent<-rep("No", length(targets$stringent))
targets$stringent[id_stringent]<-"Yes"
targets$stringent<-as.factor(targets$stringent)
ggscatter(targets,x="freq",y="s",color = "stringent")

#higher s with same freq tend to be caught

Task 3

relaxed.cluster.small<-perform_clustering(candSNP,l.min.cl.cor = 0.2,l.win.size = 5e+05)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
relaxed.cluster.big<-perform_clustering(candSNP,l.min.cl.cor = 0.2, l.win.size = 1e+07)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
r<-obtain_colors(relaxed.cluster.small,cmh)
#png("res/demo-relaxed.cluster.s.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering small",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

#png("res/demo-relaxed.cluster.s.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))

#dev.off()

r<-obtain_colors(relaxed.cluster.big,cmh)
#png("res/demo-relaxed.cluster.b.png",width = 750,height = 500)
print(get.plot.highlight(data=cmh,l.main = "Relaxed Clustering big",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

#png("res/demo-relaxed.cluster.b.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))

#dev.off()


length(table(relaxed.cluster.big$id))
## [1] 3
length(table(relaxed.cluster$id))
## [1] 7
length(table(relaxed.cluster.small$id))
## [1] 19
#more clusters with decreasing window size 

stringent.cluster.small<-perform_clustering(candSNP,l.min.cl.cor = 0.6,l.win.size = 5e+05)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
stringent.cluster.big<-perform_clustering(candSNP,l.min.cl.cor = 0.6,l.win.size = 1e+07)
## [1] "Initialize Time Series ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 2 is running. Please be patient ..."
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "Clustering on chromosome 3 is running. Please be patient ..."
r<-obtain_colors(stringent.cluster.big,cmh)

#png("res/demo-stringent.cluster.b.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))

#dev.off()

#png("res/demo-stringent.cluster.b.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering big",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

r<-obtain_colors(stringent.cluster.small,cmh)


#png("res/demo-stringent.cluster.s.af.png",width = 750,height = 500)
print(get.af.traj(l.sync = candSNP,l.cluster = r,l.cluster.id = "2.1"))

#dev.off()

#png("res/demo-stringent.cluster.s.png",width=750,height=500)
print(get.plot.highlight(data = cmh,l.main="Stringent Clustering small",highlight = r,highlight.col = r$color))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

#dev.off()

length(table(stringent.cluster.big$id))
## [1] 16
length(table(stringent.cluster$id))
## [1] 16
length(table(stringent.cluster.small$id))
## [1] 16

HAPLOVALIDATE

Define information of our sample

repl <- 1:10 #replicates id
gens <- seq(0,60,10) #generations sampled
ngen=length(gens) #number of time points sampled
nrep=length(repl) #number of replicates

##parameters for haplovalidate##
# define which columns contain the base population (e.g. the first 5 columns)
base.pops <- c(rep(TRUE, length(repl)),rep(FALSE,length(repl)*(ngen-1)))
#define which columns should be included in the analysis
compare <- rep(TRUE,length(repl)*ngen)
# define which column contains which replicate 
pop.ident <- rep(repl,ngen)
#define which column contains which generation
pop.generation  <- rep(gens, each=length(repl))

Load simulated data

#define our working directory
wdir="/Users/sduarri/Dropbox (PopGen)/Sara/year2/eecourse/day3-haplotypes/"
#wdir="~/Dropbox (PopGen)/Sara/year2/eecourse/haplotypes/"
setwd(wdir) #note , run directly on terminal :)
#load cmh result and our simulated data
cmh.raw <-  readRDS(paste0(wdir, "data/", "sim929_acer_cmh.rds"))
cands.all <- readRDS(paste0(wdir, "data/","sim929_cands.rds"))

cmh is storing a table with 1) chromosome 2) position 3)log10(pval), from CMH. This includes signficant and non-significant snps

Cands includes 1) chromosome 2) position 3) ref allele 4) rising allele 5) falling allele 6) mean starting/base frequency 7-76) frequencies of each replicate across each time point (sorted by time points). To save some memory, cands is prefiltered, but usually this should have the same number of rows as cmh.

(If needed, how to create a cands file?)

#from a sync file

# define which columns of the sync file contain the base population (e.g. the first 5 columns)
#base.pops <- c(rep(TRUE, length(repl)),rep(FALSE,length(repl)*(ngen-1)))

# define which columns should be used to polarize for the rising allele
#list(c(F0Rep1,FNRep1),c(F0Rep2,FNRep2),...))
#polaRise =list()
#for (r in repl){
 # polaRise[[r]]=(c(r, (r+nrep*(ngen-1))))
#}

#cands.all <- sync_to_frequencies(syncfile,base.pops=base.pops,header=FALSE,mincov=15,polaRise = polaRise)

It can also be created from a vcf file 1. obtain a AF file, this usually provides ref & alt, and usually tracks af for alt snp (check if that is your case) 2. define median starting frequency for each SNP & store it 3. define median “end” frequency (generation that we want to polarize AFC for) 4a. if AFC end-starting (median frequency) is positive, no need to change AF. rising = alt, falling = ref 4b. if AFC is negative change all frequencies for the SNP to 1-AF. rising = ref, falling = alt

Get significant snps, and their AF

cmh <- cmh.raw[score>1.3,]
cands= merge(cmh[,.(chr,pos)],cands.all,on=.(chr,pos))

Define windows size for each chromosome

parameters <- get.mncs.win(cands,cmh, wins=seq(0.1,10,0.05),0.01)

print(parameters)
##    chr  win
## 1:   2 2.75
## 2:   3 4.05

Run haplovalidate!

start <- Sys.time()

happy <- haplovalidate(cands,cmh,parameters=parameters,pop.ident=pop.ident,pop.generation=pop.generation,base.pops=base.pops,compare=compare,takerandom=2000,filterrange=5000)
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minfreqchange is below the recommend change
##             of 0.2, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : The set minrepl parameter is below the recommend number
##             of 3, keep this in mind when working with the results.
## Warning in validity_initialize_SNP_time_series(chr = chr, pos = pos, base.freq = base.freq, : A too large range of starting frequencies is not 
##           recommended because it can result in too noisy correlations.
##           Consider to modify parameters 'min.minor.freq' and 'max.minor.freq'.
##           This parameter setting should only be used for exploratory purposes.
## [1] "chr 2 cluster corr 0.9"
## [1] "found 0 clusters"
## [1] "chr 2 cluster corr 0.8"
## [1] "found 0 clusters"
## [1] "chr 2 cluster corr 0.7"
## [1] "found 6 clusters"
## [1] "chr 2 cluster corr 0.6"
## [1] "found 8 clusters"
## [1] "chr 2 cluster corr 0.5"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 15 clusters"
## [1] "chr 2 cluster corr 0.4"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 14 clusters"
## [1] "chr 2 cluster corr 0.3"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
## [1] "found 8 clusters"
## [1] "chr 3 cluster corr 0.9"
## [1] "found 0 clusters"
## [1] "chr 3 cluster corr 0.8"
## [1] "found 2 clusters"
## [1] "chr 3 cluster corr 0.7"
## [1] "found 7 clusters"
## [1] "chr 3 cluster corr 0.6"
## [1] "found 6 clusters"
## [1] "chr 3 cluster corr 0.5"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 8 clusters"
## [1] "chr 3 cluster corr 0.4"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, :
## min.cl.cor below 0.6 is not recommended.
## [1] "found 15 clusters"
## [1] "chr 3 cluster corr 0.3"
## Warning in validity_reconstruct_hb(object, chrom, min.cl.size, min.cl.cor, : min.cl.cor below 0.4 should not be used! 
##             Be aware that the used correlation threshold is extremely weak
##             and the chance is high that unrelateted things are clustered together!
## [1] "found 7 clusters"
## [1] "chr 2 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ...............[1] " pval 0.0395 for chr 2"
## [1] "chr 2 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ........[1] " pval 0.2527 for chr 2"
## [1] "chr 2 corr 0.7"
## [1] "found 7 clusters"
## [1] "corr  0.7"
## ......[1] " pval 0.0002 for chr 2"
## [1] "chr 2 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ........[1] " pval 0.2527 for chr 2"
## [1] "found 7 clusters"
## [1] "found 8 clusters"
## [1] "corr  0.69"
## .......[1] " pval 0 for chr 2"
## [1] "found 7 clusters"
## [1] "found 8 clusters"
## [1] "corr  0.68"
## .......[1] " pval 0 for chr 2"
## [1] "found 8 clusters"
## [1] "found 9 clusters"
## [1] "corr  0.67"
## ........[1] " pval 0.0006 for chr 2"
## [1] "found 7 clusters"
## [1] "found 11 clusters"
## [1] "corr  0.66"
## .......[1] " pval 0.0194 for chr 2"
## [1] "found 7 clusters"
## [1] "found 10 clusters"
## [1] "corr  0.65"
## .......[1] " pval 0.0207 for chr 2"
## [1] "found 8 clusters"
## [1] "found 8 clusters"
## [1] "corr  0.64"
## ........[1] " pval 0.062 for chr 2"
## [1] "Success!"
## [1] "chr 2 done"
##      chr corr clust      pos      tag
##   1:   2 0.64     1  6597257 2_0.64_1
##   2:   2 0.64     1  6597852 2_0.64_1
##   3:   2 0.64     1  6601865 2_0.64_1
##   4:   2 0.64     1  6609701 2_0.64_1
##   5:   2 0.64     1  6612063 2_0.64_1
##  ---                                 
## 520:   2 0.64     8 31614678 2_0.64_8
## 521:   2 0.64     8 31618854 2_0.64_8
## 522:   2 0.64     8 31620365 2_0.64_8
## 523:   2 0.64     8 31651886 2_0.64_8
## 524:   2 0.64     8 32253838 2_0.64_8
## [1] "chr 3 corr 0.4"
## [1] "found 15 clusters"
## [1] "corr  0.4"
## ...............[1] " pval 0.4692 for chr 3"
## [1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 corr 0.6"
## [1] "found 8 clusters"
## [1] "corr  0.6"
## ......[1] "chr 3 corr 0.5"
## [1] "found 15 clusters"
## [1] "corr  0.5"
## ........[1] " pval 0.1434 for chr 3"
## [1] "chr 3 done; no  threshold!"
#happy <- haplovalidate(cands,cmh,parameters=parameters,takerandom=2000,filterrange=5000, repl = repl, gens = gens)
end <- Sys.time()

#plot dominant blocks (will be stored in your working directory)
plot.haplovalidate(blocks=happy$dominant_haplotypes,cmh,title="sim929",label=T)
##       chr      pos                 tag groupcol
##    1:   3 29587701 3_0.5_7_noThreshold  #3288BD
##    2:   3 29587148 3_0.5_7_noThreshold  #3288BD
##    3:   3 29587623 3_0.5_7_noThreshold  #3288BD
##    4:   3 29587119 3_0.5_7_noThreshold  #3288BD
##    5:   3 29587549 3_0.5_7_noThreshold  #3288BD
##   ---                                          
## 2279:   3 11202507 3_0.5_1_noThreshold  #D7D5A7
## 2280:   3 10710848 3_0.5_1_noThreshold  #D7D5A7
## 2281:   3 11180560 3_0.5_1_noThreshold  #D7D5A7
## 2282:   3 10902786 3_0.5_1_noThreshold  #D7D5A7
## 2283:   3 10074133 3_0.5_1_noThreshold  #D7D5A7
#if needed save the data
saveRDS(happy,paste0(wdir,"results/", "sim929_hapval.rds"))

Check if we detected selected targets

happy_dom <- happy$dominant_haplotypes

#cmh <-  readRDS(paste0(wdir, "data/", "sim929_acer_cmh.rds"))
#cands.all <- readRDS(paste0(wdir, "data/","sim929_cands.rds"))

#load selected targets
targets <-  readRDS(paste0(wdir, "data/", "929_targets_all.rds"))
happy.cmh <- merge(cmh,happy_dom,by=c("chr","pos"))
targets.cmh <- merge(cmh,targets,by=c("chr","pos"))

#plot
par(mfrow=c(2,1))
for (i in 2:3){
cmh[chr==i,plot(pos,score,pch=19,col="grey")]
happy.cmh[chr==i,points(pos,score,col=factor(tag),pch=19)]
targets.cmh[chr==i,points(pos,score,col="orange",pch=19)]
}